{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install llama-index-llms-cleanlab llama-index llama-index-embeddings-huggingface"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setup LLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "load_dotenv()\n",
    "\n",
    "from llama_index.llms.cleanlab import CleanlabTLM\n",
    "\n",
    "\n",
    "options = {\n",
    "    \"model\": \"gpt-4o\",\n",
    "    \"max_tokens\": 256,\n",
    "    \"log\": [\"explanation\"],\n",
    "}\n",
    "\n",
    "llm = CleanlabTLM(api_key=os.environ[\"CLEANLAB_API_KEY\"], options=options)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NVIDIA's ticker symbol is NVDA.\n"
     ]
    }
   ],
   "source": [
    "response = llm.complete(\"What is NVIDIA's ticker symbol?\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'trustworthiness_score': 0.9885545474223644,\n",
       " 'explanation': 'Did not find a reason to doubt trustworthiness.'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response.additional_kwargs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import Settings\n",
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
    "\n",
    "from utils import (\n",
    "    setup_trustworthiness_handler,\n",
    "    display_response\n",
    ")\n",
    "\n",
    "\n",
    "Settings.llm = llm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "Settings.embed_model = HuggingFaceEmbedding(\n",
    "    model_name=\"BAAI/bge-small-en-v1.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Dict, List, ClassVar\n",
    "from llama_index.core.instrumentation.events import BaseEvent\n",
    "from llama_index.core.instrumentation.event_handlers import BaseEventHandler\n",
    "from llama_index.core.instrumentation import get_dispatcher\n",
    "from llama_index.core.instrumentation.events.llm import LLMCompletionEndEvent\n",
    "\n",
    "\n",
    "class GetTrustworthinessScoreAndReasoning(BaseEventHandler):\n",
    "    events: ClassVar[List[BaseEvent]] = []\n",
    "    trustworthiness_score: float = 0.0\n",
    "    reasoning: str = \"\"\n",
    "\n",
    "    @classmethod\n",
    "    def class_name(cls) -> str:\n",
    "        \"\"\"Class name.\"\"\"\n",
    "        return \"GetTrustworthinessScoreAndReasoning\"\n",
    "\n",
    "    def handle(self, event: BaseEvent) -> Dict:\n",
    "        if isinstance(event, LLMCompletionEndEvent):\n",
    "            self.trustworthiness_score = event.response.additional_kwargs[\n",
    "                \"trustworthiness_score\"\n",
    "            ]\n",
    "            self.reasoning = event.response.additional_kwargs[\n",
    "                \"explanation\"\n",
    "            ]\n",
    "            self.events.append(event)\n",
    "\n",
    "\n",
    "# Root dispatcher\n",
    "root_dispatcher = get_dispatcher()\n",
    "\n",
    "# Register event handler\n",
    "event_handler = GetTrustworthinessScoreAndReasoning()\n",
    "root_dispatcher.add_event_handler(event_handler)\n",
    "\n",
    "def display_response(response):\n",
    "    response_str = response.response\n",
    "    trustworthiness_score = event_handler.trustworthiness_score\n",
    "    reasoning = event_handler.reasoning\n",
    "    print(f\"Response: {response_str}\")\n",
    "    print(f\"Trustworthiness score: {round(trustworthiness_score, 2)}\")\n",
    "    print(f\"Reasoning: {reasoning}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Optional: Define `display_response` helper function\n",
    "\n",
    "\n",
    "# This method presents formatted responses from our TLM-based RAG pipeline. It parses the output to display both the text response itself and the corresponding trustworthiness score.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Started parsing the file under job_id 3e4a97ad-7b24-45e2-af2f-45c549e10e88\n"
     ]
    }
   ],
   "source": [
    "# LlamaParse PDF reader for PDF Parsing\n",
    "from llama_parse import LlamaParse\n",
    "\n",
    "documents = LlamaParse(result_type=\"markdown\").load_data(\n",
    "    \"/Users/akshay/Eigen/ai-engineering-hub/trustworthy-rag/docs/dspy.pdf\"\n",
    ")\n",
    "# Started parsing the file under job_id b76a572b-d2bb-42ae-bad9-b9810049f1af"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DSPy is a framework that abstracts the process of prompting and fine-tuning language models by using natural language signatures. These signatures are typed declarations of functions that specify what a text transformation needs to do, rather than how to prompt a specific language model to achieve that behavior. DSPy signatures are used to create modules, such as the Predict module, which can be instantiated to perform tasks like question-answering. DSPy also includes more sophisticated modules that generalize prompting techniques, allowing for the creation of modular functions that support any signature. The framework aims to improve the quality of simple programs by automating and optimizing the prompting process without the need for hand-crafted prompts.\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\"What is is DSPy?\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Response: DSPy is a framework that abstracts the process of prompting and fine-tuning language models by using natural language signatures. These signatures are typed declarations of functions that specify what a text transformation needs to do, rather than how to prompt a specific language model to achieve that behavior. DSPy signatures are used to create modules, such as the Predict module, which can be instantiated to perform tasks like question-answering. DSPy also includes more sophisticated modules that generalize prompting techniques, allowing for the creation of modular functions that support any signature. The framework aims to improve the quality of simple programs by automating and optimizing the prompting process without the need for hand-crafted prompts.\n",
      "Trustworthiness score: 0.96\n",
      "Reasoning: Did not find a reason to doubt trustworthiness.\n"
     ]
    }
   ],
   "source": [
    "display_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env_gen",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
